
######################
## PACKAGES
######################
library(dplyr)
library(haven)
library(Hmisc)
library(MASS)
library(foreign)


######################
## DATA
######################

Valg1965 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/VAlgundersøkelsen 1965/Valgundersøkelsen 1965.dta")
Valg1969 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersøkelsen 1969/Valgundersøkelsen 1969.dta")
Valg1973 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 1973/Folkeavstemningen om EF 1972 og Stortingsvalget 1973.dta")
Valg1977 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 1977/Valgundersøkelsen 1977.dta")
Valg1981 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 1981/Valgundersøkelsen 1981.dta")
Valg1985 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 1985/Valgundersøkelsen 1985.dta")
Valg1989 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 1989/Valgundersøkelsen 1989.dta")
Valg1993 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 1993/Valgundersøkelsen 1993.dta")
Valg1997 <- read.dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 1997/Valgundersøkelsen 1997.dta") # Must use read.dta
Valg2001 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 2001/Valgundersøkelsen 2001.dta")
Valg2005 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgunersokelsen 2005/Valgundersøkelsen 2005.dta")
Valg2009 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 2009/Valgundersøkelsen 2009.dta")
Valg2013 <- read_dta("Desktop/PHD/Article projects/Datasets/VALGUNDERSOKELSENE/Valgundersokelsen 2013/Valgundersøkelsen 2013.dta")


#######################################################
## STEP 1: Find covariance between income and sex 
#######################################################

################################## ELECTION STUDY 1965 ##################################
d <- Valg1965
# Harmonize election study data
table(d$v291)
d$income <- d$v291
table(d$sex)
d$sex <- car::recode(d$v264,"1=1;2=0;else=NA")

###########################
# Create midpoint score
d1 <- as.data.frame(table(d$income))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score_inc = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score_inc[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- d1 %>% dplyr::select(income,score_inc)
# Apply to dataset
d <- merge(d,d1,by="income")
###########################
cor_1965<-cor(d$score_inc,d$sex,use="complete.obs")

##########################
cov_1965 <- cov(d$score_inc,d$sex)
var_inc_1965 <- var(d$score_inc)
var_sex_1965 <- var(d$sex)


################################## ELECTION STUDY 1969 ##################################
d <- Valg1969
# Income and sex variables
table(d$v428)
d$income <- d$v428
table(d$v386)
d$sex <- car::recode(d$v386,"1=1;2=0;else=NA")
###########################
# Create midpoint score
d1 <- as.data.frame(table(d$income))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score_inc = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score_inc[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- d1 %>% dplyr::select(income,score_inc)
# Apply to dataset
d <- merge(d,d1,by="income")
###########################
cor_1969<-cor(d$score_inc,d$sex,use="complete.obs")
cov_1969 <- cov(d$score_inc,d$sex)
var_inc_1969 <- var(d$score_inc)
var_sex_1969 <- var(d$sex)



################################## ELECTION STUDY 1973 ##################################
d <- Valg1973
# Income and sex variables
table(d$v248)
d$income <- d$v248
table(d$v184)
d$sex <- car::recode(d$v184,"1=1;2=0;else=NA")
###########################
# Create midpoint score
d1 <- as.data.frame(table(d$income))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score_inc = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score_inc[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- d1 %>% dplyr::select(income,score_inc)
# Apply to dataset
d <- merge(d,d1,by="income")
###########################
cor_1973<-cor(d$score_inc,d$sex,use="complete.obs")

cov_1973 <- cov(d$score_inc,d$sex)
var_inc_1973 <- var(d$score_inc)
var_sex_1973 <- var(d$sex)

################################## ELECTION STUDY 1977 ##################################
d <- Valg1977
# Income and sex variables
table(d$v320)
d$income <- d$v320
table(d$v002)
d$sex <- car::recode(d$v002,"1=1;2=0;else=NA")
###########################
# Create midpoint score
d1 <- as.data.frame(table(d$income))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score_inc = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score_inc[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- d1 %>% dplyr::select(income,score_inc)
# Apply to dataset
d <- merge(d,d1,by="income")
###########################
cor_1977<-cor(d$score_inc,d$sex,use="complete.obs")

cov_1977 <- cov(d$score_inc,d$sex)
var_inc_1977 <- var(d$score_inc)
var_sex_1977 <- var(d$sex)



################################## ELECTION STUDY 1981 ##################################
d <- Valg1981
# Income and sex variables
table(d$var408)
d$income <- car::recode(d$var408,"8:hi=NA")
table(d$var013)
d$sex <- car::recode(d$var013,"1=1;2=0;else=NA")
###########################
# Create midpoint score
d1 <- as.data.frame(table(d$income))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score_inc = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score_inc[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- d1 %>% dplyr::select(income,score_inc)
# Apply to dataset
d <- merge(d,d1,by="income")
###########################
cor_1981<-cor(d$score_inc,d$sex,use="complete.obs")

cov_1981 <- cov(d$score_inc,d$sex)
var_inc_1981 <- var(d$score_inc)
var_sex_1981 <- var(d$sex)



################################## ELECTION STUDY 1985 ##################################
d <- Valg1985
# Income and sex variables
table(d$V486)
d$income <- car::recode(d$V486,"9:hi=NA")
table(d$V16)
d$sex <- car::recode(d$V16,"1=1;2=0;else=NA")
###########################
# Create midpoint score
d1 <- as.data.frame(table(d$income))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score_inc = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score_inc[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- d1 %>% dplyr::select(income,score_inc)
# Apply to dataset
d <- merge(d,d1,by="income")
###########################
cor_1985<-cor(d$score_inc,d$sex,use="complete.obs")

cov_1985 <- cov(d$score_inc,d$sex)
var_inc_1985 <- var(d$score_inc)
var_sex_1985 <- var(d$sex)


################################## ELECTION STUDY 1989 ##################################
d <- Valg1989
# Income and sex variables
table(d$V534)
d$income <- car::recode(d$V534,"8:9=NA")
table(d$V16)
d$sex <- car::recode(d$V16,"1=1;2=0;else=NA")
###########################
# Create midpoint score
d1 <- as.data.frame(table(d$income))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score_inc = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score_inc[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- d1 %>% dplyr::select(income,score_inc)
# Apply to dataset
d <- merge(d,d1,by="income")
###########################
cor_1989<-cor(d$score_inc,d$sex,use="complete.obs")

cov_1989 <- cov(d$score_inc,d$sex)
var_inc_1989 <- var(d$score_inc)
var_sex_1989 <- var(d$sex)



################################## ELECTION STUDY 1993 ##################################
d <- Valg1993
# Income and sex variables
table(d$v288)
d$income <- d$v288
table(d$v004)
d$sex <- car::recode(d$v004,"1=1;2=0;else=NA")
###########################
# Create midpoint score
d1 <- as.data.frame(table(d$income))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score_inc = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score_inc[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- d1 %>% dplyr::select(income,score_inc)
# Apply to dataset
d <- merge(d,d1,by="income")
###########################
cor_1993<-cor(d$score_inc,d$sex,use="complete.obs")

cov_1993 <- cov(d$score_inc,d$sex)
var_inc_1993 <- var(d$score_inc)
var_sex_1993 <- var(d$sex)


################################## ELECTION STUDY 1997 ##################################
d <- Valg1997
# Income and sex variables
table(d$v222)
d$income <- d$v222
table(as.numeric(d$v225))
d$sex <- car::recode(as.numeric(d$v225),"1=1;2=0;else=NA")
###########################
# Create midpoint score
d1 <- as.data.frame(table(d$income))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score_inc = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score_inc[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- d1 %>% dplyr::select(income,score_inc)
# Apply to dataset
d <- merge(d,d1,by="income")
###########################
cor_1997<-cor(d$score_inc,d$sex,use="complete.obs")

cov_1997 <- cov(d$score_inc,d$sex)
var_inc_1997 <- var(d$score_inc)
var_sex_1997 <- var(d$sex)



################################## ELECTION STUDY 2001 ##################################
d <- Valg2001
# Income and sex variables
table(d$v461)
d$income <- car::recode(d$v461,"0=NA;999999:hi=NA")
table(d$v005)
d$sex <- car::recode(d$v005,"1=1;2=0;else=NA")
###########################
# Create midpoint score
d1 <- as.data.frame(table(d$income))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score_inc = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score_inc[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- d1 %>% dplyr::select(income,score_inc)
# Apply to dataset
d <- merge(d,d1,by="income")
###########################
cor_2001<-cor(d$score_inc,d$sex,use="complete.obs")

cov_2001 <- cov(d$score_inc,d$sex)
var_inc_2001 <- var(d$score_inc)
var_sex_2001 <- var(d$sex)




################################## ELECTION STUDY 2005 ##################################
d <- Valg2005
# Income and sex variables
table(d$v294)
d$income <- car::recode(d$v294,"0=NA;99999998:hi=NA")
table(d$v005)
d$sex <- car::recode(d$v005,"1=1;2=0;else=NA")
###########################
# Create midpoint score
d1 <- as.data.frame(table(d$income))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score_inc = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score_inc[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- d1 %>% dplyr::select(income,score_inc)
# Apply to dataset
d <- merge(d,d1,by="income")
###########################
cor_2005<-cor(d$score_inc,d$sex,use="complete.obs")

cov_2005 <- cov(d$score_inc,d$sex)
var_inc_2005 <- var(d$score_inc)
var_sex_2005 <- var(d$sex)


################################## ELECTION STUDY 2009 ##################################
d <- Valg2009
# Income and sex variables
table(d$v296)
d$income <- car::recode(d$v296,"0=NA;999999:hi=NA")
table(d$v006)
d$sex <- car::recode(d$v006,"1=1;2=0;else=NA")
###########################
# Create midpoint score
d1 <- as.data.frame(table(d$income))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score_inc = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score_inc[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- d1 %>% dplyr::select(income,score_inc)
# Apply to dataset
d <- merge(d,d1,by="income")
###########################
cor_2009<-cor(d$score_inc,d$sex,use="complete.obs")

cov_2009 <- cov(d$score_inc,d$sex)
var_inc_2009 <- var(d$score_inc)
var_sex_2009 <- var(d$sex)



################################## ELECTION STUDY 2013 ##################################
d <- Valg2013
# Income and sex variables
table(d$inntekt)
d$income <- d$inntekt
table(d$sex)
d$sex <- car::recode(d$Kjonn,"1=1;2=0;else=NA")
###########################
# Create midpoint score
d1 <- as.data.frame(table(d$income))
d1$prop <- d1$Freq/sum(d1$Freq, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score_inc = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score_inc[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- d1 %>% dplyr::select(income,score_inc)
# Apply to dataset
d <- merge(d,d1,by="income")
###########################
cor_2013<-cor(d$score_inc,d$sex,use="complete.obs")

cov_2013 <- cov(d$score_inc,d$sex)
var_inc_2013 <- var(d$score_inc)
var_sex_2013 <- var(d$sex)



#######################################################################################
################################## COMBINE ############################################

#cors <- c(cor_1965,cor_1969,cor_1973,cor_1977,cor_1981,cor_1985,
#          cor_1989,cor_1993,cor_1997,cor_2001,cor_2005,cor_2009,cor_2013)
#year <- c(seq(1965,2013,4))
#corsdat <- data.frame(year,cors)
#write.xlsx(corsdat,"correlations_gender_house.xlsx")

covs <- c(cov_1965,cov_1969,cov_1973,cov_1977,cov_1981,cov_1985,
          cov_1989,cov_1993,cov_1997,cov_2001,cov_2005,cov_2009,cov_2013)
year <- c(seq(1965,2013,4))
covsdat <- data.frame(year,covs)
ggplot(covsdat,aes(year,covs)) +
  geom_smooth(method = "loess") +
  scale_y_continuous(limits=c(0,0.04))
m1 <- loess(covs~year,data=covsdat)
preds<-data.frame(year=seq(1966,2014,1))
preds$cov<-predict(m1,preds)
preds$cov[preds$year==2014] <- preds$cov[preds$year==2013]
covariances_gender_house <- preds


# Variances: Income
var_inc <- c(var_inc_1965,var_inc_1969,var_inc_1973,var_inc_1977,var_inc_1981,var_inc_1985,
          var_inc_1989,var_inc_1993,var_inc_1997,var_inc_2001,var_inc_2005,var_inc_2009,var_inc_2013)
year <- c(seq(1965,2013,4))
varsdat <- data.frame(year,var_inc)
ggplot(varsdat,aes(year,var_inc)) +
  geom_smooth(method = "loess") +
  scale_y_continuous(limits=c(0,0.04))
m1 <- loess(var_inc~year,data=varsdat)
preds<-data.frame(year=seq(1966,2014,1))
preds$var_inc<-predict(m1,preds)
preds$var_inc[preds$year==2014] <- preds$vars[preds$year==2013]
variances_income <- preds

# Variances: Sex
var_sex <- c(var_sex_1965,var_sex_1969,var_sex_1973,var_sex_1977,var_sex_1981,var_sex_1985,
          var_sex_1989,var_sex_1993,var_sex_1997,var_sex_2001,var_sex_2005,var_sex_2009,var_sex_2013)
year <- c(seq(1965,2013,4))
varsdat <- data.frame(year,var_sex)
ggplot(varsdat,aes(year,vars)) +
  geom_smooth(method = "loess")
m1 <- loess(var_sex~year,data=varsdat)
preds<-data.frame(year=seq(1966,2014,1))
preds$var_sex<-predict(m1,preds)
preds$var_sex[preds$year==2014] <- preds$var_sex[preds$year==2013]
variances_gender <- preds

covariances <- merge(covariances_gender_house,variances_gender,by="year")
covariances <- merge(covariances,variances_income,by="year")

write.xlsx(covariances,"NES_covariances_Norway.xlsx")

